# Packages
require(ggplot2)
require(plotly)
require(geojsonio)
require(sp)
require(sf)
require(rvest)
require(RSelenium)
require(htmltools)
require(ggmap)
# Download file for state info
url = "https://opendata.arcgis.com/datasets/5f45e1ece6e14ef5866974a7b57d3b95_1.geojson"
file = "NJ_counties.geojson"
download.file(url,file)
rm(url)
# Convert file to sf object
file = "NJ_counties.geojson"
NJ_Counties <-
geojson_sf(file)
rm(file)
# Clean Data
NJ_Counties_Cleaned <-
NJ_Counties %>%
transmute(
county = COUNTY,
CO = CO,
pop = POP2010,
popdensity = POPDEN2010,
Shape_Length = Shape_Length,
Shape_Area = Shape_Area,
GNIS = GNIS
)
#get page source from website
gc()
driver <- rsDriver(browser = c("firefox"), port = 44454L)
remote_driver <- driver[["client"]]
remote_driver$navigate("https://www.childrens-specialized.org/locations-directory/?")
page <- remote_driver$getPageSource()
# Retrieve information from directory
Xpathgen1 = "/html/body/div[1]/div/div/div[2]/div/div[2]/div["
Xpathgen2 = "]/div/div[2]/article"
Hosinfo <- data.frame()
for (i in 1:15){
XPath <- paste(Xpathgen1,i,Xpathgen2,sep = "")
Node <- page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPath)
name <-
Node[[1]] %>%
html_node("h2") %>%
html_text()
address <-
Node[[1]] %>%
html_node("h3") %>%
html_text() %>%
gsub(pattern = "\n *",replacement = " ", x = .)
for (i in 1:7){
XPathday <- paste(XPath,"/div[",i,"]",sep = "")
day <- page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPathday) %>%
html_attr("class") %>%
grep("-Hours",x = .,value = TRUE) %>%
gsub("-Hours","",x = .)
times <-
page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPathday) %>%
html_node("h3") %>%
html_text()
assign(day,times)
}
row = data.frame(name,address,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday)
Hosinfo <- rbind(Hosinfo,row)
}
# Write csv file
write.csv(Hosinfo, "Hospitals.csv")
# Data Wrangling
Hosinfo <- read.csv("Hospitals.csv")
pattern <- "([0-1]*[0-9]:[0-5][0-9] *[AaPp][Mm][-to ]+[0-1]*[0-9]:[0-5][0-9] *[AaPp][Mm])" # Pattern to extract first time range
Hosloc <-
Hosinfo %>%
select(name,address) %>%
mutate_geocode(address) # Requires google API key
rm(pattern)
write.csv(Hosloc,"Hospitalsloc.csv")
# Map of NJ
NJ <-
NJ_Counties_Cleaned %>%
ggplot() +
geom_sf() +
theme(legend.position = "None") +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
title = element_text(size = 9)) +
labs(y = "",
x = "",
title = "Map of NJ divided by county")
ggplotly(NJ)
NJ
plot(NJ_Counties_Cleaned)
# Population heat maps of NJ using 2010 population data
NJ <-
NJ_Counties_Cleaned %>%
ggplot() +
scale_fill_gradientn(colours=c("white", "blue")) +
geom_sf(aes(fill = pop)) +
theme(legend.position = "None") +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
title = element_text(size = 9)) +
labs(y = "",
x = "",
title = "2010 Population Heatmap")
ggplotly(NJ)
NJ
plot(NJ_Counties_Cleaned["pop"])
# Location of Hospitals in NJ on heatmap
Hosloc <- read.csv("Hospitalsloc.csv")
# Only need out patient centers
Hosloc <-
Hosloc %>%
filter(grepl("Outpatient", name))
NJ <-
NJ_Counties_Cleaned %>%
ggplot() +
scale_fill_gradientn(colours=c("white", "blue")) +
geom_sf(aes(fill = pop)) +
geom_point(data = Hosloc,aes(x = lon,y = lat,shape = "square", label = name,color = "orange")) +
theme(legend.position = "None") +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
title = element_text(size = 9, family = "serif")) +
labs(y = "",
x = "",
title = "2010 Population Heatmap\nwith CSH Outpatient Center")
Ignoring unknown aesthetics: label
ggplotly(NJ,tooltip = "name")
NJ
NJ %>%
ggsave(file="PopheatmapNJ.png", plot = ., width=3, height=4, dpi=300)